In [40]:
from __future__ import print_function
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
import time
import tarfile
from IPython.display import display, Image
from scipy import ndimage
from sklearn.linear_model import LogisticRegression
from six.moves.urllib.request import urlretrieve
from six.moves import cPickle as pickle
import tensorflow as tf
import scipy

In [131]:
network_out = np.loadtxt("../code/test_track_out.dat")
true_out = np.loadtxt("../data/ann_dataset_10points_combined.out")
print(network_out.shape)
predicted_vs_actual = np.hstack((network_out, true_out[:,8:]))
print(predicted_vs_actual[:2,:])


(62532, 10)
[[ 0.08869374  0.07273774  0.07650217  0.07998341  0.10801552  0.11094129
   0.33401603  0.12339592  0.18891034  0.26151556  0.0054      0.0051
   0.0056      0.0059      0.0068      0.0071      0.0114      0.0122
   0.0134      0.0167    ]
 [-0.00694975  0.00128717  0.01496709  0.00864799  0.00684242  0.03885008
   0.12532118  0.02930014  0.04543016  0.04305997  0.0041      0.0048
   0.0056      0.0061      0.0058      0.0071      0.01        0.0108
   0.0138      0.0159    ]]

In [128]:
plt.plot(network_out[:2*193,0])
plt.show()

In [132]:
i = 5
k = np.argmax(np.abs(predicted_vs_actual[:,i]-predicted_vs_actual[:,i+10]))
print(k)
max_error_case = [np.abs(predicted_vs_actual[k,i]-predicted_vs_actual[k,i+10]) for i in range(10)]
print(max_error_case)
start = 0 #(k // 193)*193
stop = 193*20 #start + 193
#start = 0
#stop = 20*193

print(predicted_vs_actual.shape)
fig = plt.figure(figsize=(10, 6), dpi=80)
for i in range(10):
    sp = fig.add_subplot(10,1,i+1)
    if i <= 4:
        sp.set_ylim([-0.5, 3.0])
    else:
        sp.set_ylim([-0.5, 3.5])
    sp.plot(predicted_vs_actual[start:stop,i],color="blue", linewidth=1.5, linestyle="-", label="prediction")
    sp.plot(predicted_vs_actual[start:stop,i+10],color="red", linewidth=1.5, linestyle="-", label="observation")
#plt.legend(loc='upper right')
plt.show()


36239
[0.31081665000915526, 0.22333270440101624, 0.019749852007627487, 0.011854489183425909, 0.49102901506423946, 1.4919900444626808, 0.051354314613342278, 0.16495859050750733, 0.031382253324985503, 0.068443360328674319]
(62532, 20)

In [134]:
from sklearn.metrics import explained_variance_score
import scipy.stats as stats
import pylab

diff = (network_out-true_out[:,8:])
diff = diff[:,5]
print(diff.shape)

#stats.probplot(diff, dist="norm", plot=pylab)
stats.probplot(diff, dist="t", sparams=(2), plot=pylab)
pylab.show()

num_bins = 100
# the histogram of the errors
n, bins, patches = plt.hist(diff, num_bins, normed=1, facecolor='blue', alpha=0.5)

params = stats.t.fit(diff)
print(params)
dist = stats.t(params[0], params[1], params[2])
x = np.linspace(-2, 2, num_bins)
plt.plot(x, dist.pdf(x), 'r-', lw = 3, alpha=0.5, label='t pdf')
plt.show()
print(params)


(62532,)
(1.1237318642940517, -0.0024490862555934271, 0.011467140514013891)
(1.1237318642940517, -0.0024490862555934271, 0.011467140514013891)

Test distribution of errors: The best fit (R^2 = 0.855) is Student's T with 1.067 DOF, location 0.003919 (network slightly overestimates), scale = 0.01165 However, T overestimates the probability of large errors


In [135]:
import scipy
import scipy.stats

diff = (network_out-true_out[:,8:])
#print(diff.shape)
y = diff[:,5]
print(y.shape)
#y = np.square(y)
x = np.arange(-3,3,0.01)
size = diff.shape[0]

h = plt.hist(y, bins=100,  color='w')
plt.xlim(-3,3)
plt.ylim(0,1000)

dist_names = ['t']

for dist_name in dist_names:
    dist = getattr(scipy.stats, dist_name)
    param = dist.fit(y)
    #param = (1.5, param[1], param[2])
    pdf_fitted = dist.pdf(x, *param[:-2], loc=param[-2], scale=param[-1])*2000
    plt.plot(x, pdf_fitted, label=dist_name)
plt.legend(loc='upper right')

plt.show()


(62532,)

In [136]:
import scipy.stats as stats
stats.probplot(y, dist="t", sparams=(2), plot=pylab)
pylab.show()

print(param)
print(*param[:-2])
print(param[-2])
print(param[-1])


(1.1237318642940517, -0.0024490862555934271, 0.011467140514013891)
1.12373186429
-0.00244908625559
0.011467140514

Attempts to fit nonparametric distributions


In [143]:
# Gaussian kernel density estimation
from scipy import stats
import matplotlib.pyplot as plt

y = diff[:,4]
print(y.shape)
kde1 = stats.gaussian_kde(y)
#kde2 = stats.gaussian_kde(y, bw_method='silverman')

fig = plt.figure()
ax = fig.add_subplot(111)

#ax.plot(y, np.zeros(y.shape), 'b+', ms=20)  # rug plot
x_eval = np.linspace(-2, 2, num=200)
#ax.plot(x_eval, kde1(x_eval), 'k-', label="Scott's Rule")
#ax.plot(x_eval, kde2(x_eval), 'r-', label="Silverman's Rule")
#plt.legend(loc='upper right')
#plt.show()

err_min, err_max = -0.1,0.1

print("Probability of the error between %.2f and %.2f meters: %.4f"%(err_min, err_max,kde1.integrate_box_1d(err_min, err_max)))


(62532,)
Probability of the error between -0.10 and 0.10 meters: 0.9672

In [189]:
### Gaussian kernel density estimation for "active" portions of the data only
from scipy import stats
import matplotlib.pyplot as plt

active_start = 130
active_stop = 160
y = diff[:,0]

active_y = y.reshape(-1,193).transpose()[active_start:active_stop,:].reshape(-1)

print("Min: %.4fm, Max: %.4fm" %(np.amin(active_y),np.amax(active_y)))
kde1 = stats.gaussian_kde(active_y)
kde2 = stats.gaussian_kde(y, bw_method='silverman')

#fig = plt.figure()
#ax = fig.add_subplot(111)

#ax.plot(active_y, np.zeros(active_y.shape), 'b+', ms=20)  # rug plot
x_eval = np.linspace(-2, 2, num=200)
#ax.plot(x_eval, kde1(x_eval), 'k-', label="Scott's Rule")
#ax.plot(x_eval, kde2(x_eval), 'r-', label="Silverman's Rule")
#plt.legend(loc='upper right')
#plt.show()

err_min, err_max = -0.13,0.13

print("Probability of the error between %.2f and %.2f meters: %.4f"%(err_min, err_max,kde1.integrate_box_1d(err_min, err_max)))


Min: -1.1435m, Max: 0.4936m
Probability of the error between -0.13 and 0.13 meters: 0.9519